{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Examples and Exercises from Think Stats, 2nd Edition\n",
    "\n",
    "http://thinkstats2.com\n",
    "\n",
    "Copyright 2016 Allen B. Downey\n",
    "\n",
    "MIT License: https://opensource.org/licenses/MIT\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from __future__ import print_function, division\n",
    "\n",
    "%matplotlib inline\n",
    "\n",
    "import numpy as np\n",
    "\n",
    "import random\n",
    "\n",
    "import thinkstats2\n",
    "import thinkplot"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Hypothesis testing"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The following is a version of `thinkstats2.HypothesisTest` with just the essential methods:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "class HypothesisTest(object):\n",
    "\n",
    "    def __init__(self, data):\n",
    "        self.data = data\n",
    "        self.MakeModel()\n",
    "        self.actual = self.TestStatistic(data)\n",
    "\n",
    "    def PValue(self, iters=1000):\n",
    "        self.test_stats = [self.TestStatistic(self.RunModel()) \n",
    "                           for _ in range(iters)]\n",
    "\n",
    "        count = sum(1 for x in self.test_stats if x >= self.actual)\n",
    "        return count / iters\n",
    "\n",
    "    def TestStatistic(self, data):\n",
    "        raise UnimplementedMethodException()\n",
    "\n",
    "    def MakeModel(self):\n",
    "        pass\n",
    "\n",
    "    def RunModel(self):\n",
    "        raise UnimplementedMethodException()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "And here's an example that uses it to compute the p-value of an experiment where we toss a coin 250 times and get 140 heads."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "class CoinTest(HypothesisTest):\n",
    "\n",
    "    def TestStatistic(self, data):\n",
    "        heads, tails = data\n",
    "        test_stat = abs(heads - tails)\n",
    "        return test_stat\n",
    "\n",
    "    def RunModel(self):\n",
    "        heads, tails = self.data\n",
    "        n = heads + tails\n",
    "        sample = [random.choice('HT') for _ in range(n)]\n",
    "        hist = thinkstats2.Hist(sample)\n",
    "        data = hist['H'], hist['T']\n",
    "        return data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The p-value turns out to be about 7%, which is considered on the border of statistical significance."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.045"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ct = CoinTest((140, 110))\n",
    "pvalue = ct.PValue()\n",
    "pvalue"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Permutation test\n",
    "\n",
    "To compute the p-value of an observed difference in means, we can assume that there is no difference between the groups and generate simulated results by shuffling the data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "class DiffMeansPermute(thinkstats2.HypothesisTest):\n",
    "\n",
    "    def TestStatistic(self, data):\n",
    "        group1, group2 = data\n",
    "        test_stat = abs(group1.mean() - group2.mean())\n",
    "        return test_stat\n",
    "\n",
    "    def MakeModel(self):\n",
    "        group1, group2 = self.data\n",
    "        self.n, self.m = len(group1), len(group2)\n",
    "        self.pool = np.hstack((group1, group2))\n",
    "\n",
    "    def RunModel(self):\n",
    "        np.random.shuffle(self.pool)\n",
    "        data = self.pool[:self.n], self.pool[self.n:]\n",
    "        return data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here's an example where we test the observed difference in pregnancy length for first babies and others."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import first\n",
    "\n",
    "live, firsts, others = first.MakeFrames()\n",
    "data = firsts.prglngth.values, others.prglngth.values"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The p-value is about 17%, which means it is plausible that the observed difference is just the result of random sampling, and might not be generally true in the population."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.189"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ht = DiffMeansPermute(data)\n",
    "pvalue = ht.PValue()\n",
    "pvalue"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here's the distrubution of the test statistic (the difference in means) over many simulated samples:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYwAAAEKCAYAAAAB0GKPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAGGJJREFUeJzt3X+w5XV93/Hnmx+7JMgPza4ZZVkXumsVMQFzxZiMqSgmi52yWmgAtdKECVVDDSFhiqOSlrSTGNoxmjIha2MQMwQI1u6OLiUJi2maAboXcZHFrl5XLReYsKJgAgpS3/3j+73u2bPn3HPuOedzfj4fM3f2nO/3c773vd89e1/38/18P58TmYkkSZ0cNuoCJEmTwcCQJHXFwJAkdcXAkCR1xcCQJHXFwJAkdcXAkCR1xcCQJHXFwJAkdeWIURewUmvWrMkNGzaMugxJmij33nvvNzNzbT/HmLjA2LBhA/Pz86MuQ5ImSkR8o99jeElKktQVA0OS1BUDQ5LUFQNDktQVA0OS1JVigRERH4+IxyLigTb7IyI+GhELEXF/RLyqVC2SpP6V7GFcD2xeZv/ZwKb66xLgDwvWIknqU7F5GJn5PyNiwzJNtgA3ZPUZsXdHxPER8aLMfLRUTZodCwsLP3y8cePGEVYitbdt525uvm2eZ579/ope96mPvKtQRcsb5cS9E4CHGp4v1tsOCYyIuISqF8L69euHUpykydLrD191b5SBES22ZauGmbkV2AowNzfXso2kyecP/fE2ysBYBE5seL4OeGREtUgaIoPhgNWrjuT8s+fY8oafHHUpHY0yMLYDl0bETcBrgCcdv5Cmw6gCYZJ++E6iYoEREX8GvB5YExGLwG8BRwJk5nXADuDNwALwNPBLpWqRNDilw8Af+uOr5F1SF3bYn8Cvlvr+kvpjMKjZxC1vLmmwSgWDgTB9DAxpRvUTFIbBbDIwpBmzkqAwGNTIwJBmQKeQMBjUDQNDmjL2IFSKgSFNmEEMUhsU6oWBIU2QbTt3c8O2u3p6rSGhfhkY0pixB6FxZWBII9RPOLxzy2sNBA2VgSGNgHMgNIkMDGlIvHtJk87AkIZgucFqw0GTwsCQCnGynKaNgSENWKegcLBak8rAkAbEHoWmnYEh9cGQ0CwxMKQeGBSaRQaGtAIGhWaZgSF1YEhIFQNDWobzJ6QDDAypheV6FQaFZpWBITVp16tw/oRmnYEhNWgVFvYopIqBoZm33OUnexXSAQaGZpZLeEgrY2BoJnn3k7RyBoZmSrtehSEhdWZgaGZ495PUHwNDU+2Oexa46o/+2vkU0gAYGJpad9yzwKfv3MPRRx99yD57FdLKHVby4BGxOSL2RsRCRFzZYv/6iLgzIu6LiPsj4s0l69Fs+ez/2nvIttWrjjQspB4V62FExOHAtcCbgEVgV0Rsz8wHG5p9ALglM/8wIk4BdgAbStWk6dY4oP3UU08dtM+QkPpXsodxBrCQmfsy81ngJmBLU5sEjq0fHwc8UrAeTbGlAe12YxWGhdS/kmMYJwAPNTxfBF7T1ObfAX8REf8GOBo4q2A9mlLLzalYdeQRnH/23JArkqZTycCIFtuy6fmFwPWZ+Z8j4rXAJyPi1Mz8wUEHirgEuARg/fr1RYrVZGoVFu/c8lpesf7AQPfGjRuHXZY0lUoGxiJwYsPzdRx6yeliYDNAZt4VEUcBa4DHGhtl5lZgK8Dc3Fxz6GgGtZuAtzRWsbCwMKLKpOlVMjB2AZsi4iTgYeAC4G1Nbf4v8Ebg+oh4OXAUsL9gTZpwLhQojU6xwMjM5yLiUuB24HDg45m5JyKuBuYzczvwG8DHIuLXqS5X/avMtAehltqNVTgBTxqOohP3MnMH1a2yjduuanj8IPCzJWvQ5HP9J2k8ONNbY8vLT9J4MTA0lrz8JI0fA0Njx49JlcaTgaGx0m5ehUEhjV7RxQellTAspPFmYGgsGBbS+DMwNHKGhTQZHMPQyHRa3kPSeLGHoZExLKTJYg9DI7Ft5+6DwsLbZqXxZ2Bo6JrHLFavOpIbr7l4hBVJ6oaBoaFpN2bhBxxJk8HA0FC0W+rDMQtpchgYKs6lPqTpYGCoKOdYSNPD22pVjGEhTRcDQ8XcfNv8Qc8NC2myGRgqonmehWEhTT4DQwPXap6FYSFNPgNDA9Vq3MJ5FtJ0MDA0MA5yS9PNwNBAGBbS9DMwNBDeESVNPwNDffOOKGk2ONNbPWu1mKB3REnTyx6GeubKs9JssYehFWvXs3AxQWm6GRhasVZh4QcgSdPPS1JakXYfrSpp+tnDUNf8aFVpttnDUFdc8kNS0cCIiM0RsTciFiLiyjZtfjEiHoyIPRFxY8l61Dsn5kkqdkkqIg4HrgXeBCwCuyJie2Y+2NBmE/A+4Gcz89sR8cJS9ag3re6IMiyk2VSyh3EGsJCZ+zLzWeAmYEtTm18Brs3MbwNk5mMF61EPnJgnaUnJwDgBeKjh+WK9rdFLgZdGxN9GxN0RsbnVgSLikoiYj4j5/fv3FypXzbwjSlKjkndJRYtt2eL7bwJeD6wD/iYiTs3MJw56UeZWYCvA3Nxc8zFUgHdESWpWsoexCJzY8Hwd8EiLNtsy8/uZ+TVgL1WAaIS8I0pSKyUDYxewKSJOiohVwAXA9qY2/x04EyAi1lBdotpXsCZ1wTuiJLVSLDAy8zngUuB24EvALZm5JyKujohz6ma3A49HxIPAncAVmfl4qZrUmUuVS2qn6EzvzNwB7GjadlXD4wQur780Yq3GLQwLSUuc6S3AcQtJnRkYAhy3kNSZgSHHLSR1xcCYcY5bSOqWgTHDHLeQtBIGxoxqFRZeipK0HANjBhkWknqxbGBExPUNjy8qXo2KMywk9apTD6Pxp8ivlSxEw+Hts5J61SkwXBl2inj7rKR+dFoaZF1EfJRqqfKlxz+Ume8tVpkGyttnJfWrU2Bc0fB4vm0rjb3mS1HePitppZYNjMz8xLAKUTleipI0CB1vq42IiyLi8xHxVP01HxHvHEZx6p+XoiQNyrI9jDoYLqNafvzzVGMZrwKuiQgy84byJaofXoqSNCidehjvAd6amXdm5pOZ+URm7gTOrfdpjHkpStIgdQqMYzPz680b623HlihIg9PYu/BSlKR+dQqM7/a4TyPW3LvwUpSkfnW6rfblEXF/i+0BnFygHg2AA92SSugUGD8J/DjwUNP2lwCPFKlIfXHJckmldLok9WHgO5n5jcYv4Ol6n8aMa0VJKqVTYGzIzEMuSWXmPLChSEXqmXdFSSqpU2Actcy+HxlkIeqP4xaSSusUGLsi4leaN0bExcC9ZUpSL5ygJ6m0ToPelwGfjoi3cyAg5oBVwFtLFqaV8VKUpNI6LT74d8DPRMSZwKn15s/Ws701Jrbt3H3Qc8NCUgmdehgAZOadwJ2Fa1EPWo1dSFIJHVer1Xhz7ELSsHTVw9D42bZzNzffNu/YhaShsYcxoZrDwttoJZVWNDAiYnNE7I2IhYi4cpl250VERoTXU7rQPEFv9aojvRQlqbhil6Qi4nDgWuBNwCLVnI7tmflgU7tjgPcC95SqZdo0L1t+4zUXj7AaSbOiZA/jDGAhM/dl5rPATcCWFu1+G/g94HsFa5kaLlsuaVRKDnqfwMGr3C4Cr2lsEBGnAydm5mci4jcL1jLxWg1yO24haZhK9jCixbb84c6Iw6hWvP2NjgeKuCQi5iNifv/+/QMscXI0hwXYu5A0XCUDYxE4seH5Og7+DI1jqGaPfy4ivg78NLC91cB3Zm7NzLnMnFu7dm3BksdTq0Fub6GVNGwlL0ntAjZFxEnAw8AFwNuWdmbmk8CapecR8TngN+ul09XAQW5J46BYDyMznwMuBW4HvgTckpl7IuLqiDin1PedNg5ySxoXRWd6Z+YOYEfTtqvatH19yVomkZ9xIWmcONN7jLlOlKRxYmCMKT9uVdK4MTDGVPNAt2EhadQMjDHkQLekcWRgjBkHuiWNKwNjjDSHBdi7kDQ+DIwx0SosHOiWNE4MjDHRfAutYSFp3BgYY8BbaCVNAgNjxBzkljQpDIwRcza3pElhYIyYl6IkTQoDY4S27dx90HPDQtI4MzBGpNXYhSSNMwNjRBy7kDRpDIwR8DZaSZPIwBgBV6KVNIkMjCFzJVpJk8rAGCIn6UmaZAbGkLgSraRJZ2AMgSvRSpoGBsYQuBKtpGlgYBTmLbSSpoWBUZCD3JKmiYFRkLO5JU0TA6MQL0VJmjYGRiHO5pY0bQyMApzNLWkaGRgF2LuQNI0MjAGzdyFpWhkYA2bvQtK0KhoYEbE5IvZGxEJEXNli/+UR8WBE3B8Rd0TES0rWU5q9C0nTrFhgRMThwLXA2cApwIURcUpTs/uAucz8CeBW4PdK1VOak/QkTbuSPYwzgIXM3JeZzwI3AVsaG2TmnZn5dP30bmBdwXqKcSVaSbOgZGCcADzU8Hyx3tbOxcBtrXZExCURMR8R8/v37x9gif1zJVpJs6JkYESLbdmyYcQ7gDngmlb7M3NrZs5l5tzatWsHWGL/XIlW0qw4ouCxF4ETG56vAx5pbhQRZwHvB/5JZj5TsJ6Bc/kPSbOkZA9jF7ApIk6KiFXABcD2xgYRcTrwR8A5mflYwVoGzkFuSbOmWGBk5nPApcDtwJeAWzJzT0RcHRHn1M2uAZ4H/HlEfCEitrc53NhxJVpJs6bkJSkycwewo2nbVQ2Pzyr5/UvxUpSkWeRM7x44m1vSLDIwVsjZ3JJmlYGxAg50S5plBsYKONAtaZYZGCvgQLekWWZgdGnbzt0HPTcsJM0aA6MLrcYuJGnWGBhdcOxCkgyMjpykJ0kVA2MZ3kYrSQcYGMvwUpQkHWBgtOGlKEk6mIHRhutFSdLBDIwWXC9Kkg5lYLRg70KSDmVgNLF3IUmtGRhN7F1IUmsGRgN7F5LUnoFRc5KeJC3PwKg5SU+Slmdg4CQ9SeqGgYED3ZLUjZkPDAe6Jak7Mx0YDnRLUvdmOjAc6Jak7s1sYDjQLUkrM5OB4aUoSVq5mQwML0VJ0srNXGB4KUqSejNzgeGcC0nqTdHAiIjNEbE3IhYi4soW+1dHxM31/nsiYkPJegDnXEhSj4oFRkQcDlwLnA2cAlwYEac0NbsY+HZmbgQ+DHyoVD1QXY5qZO9CkrpXsodxBrCQmfsy81ngJmBLU5stwCfqx7cCb4yIKFVQ8+UoSVL3SgbGCcBDDc8X620t22Tmc8CTwI+VKObcX7vOy1GS1IeSgdGqp5A9tCEiLomI+YiY379/f9+FOdgtSStXMjAWgRMbnq8DHmnXJiKOAI4DvtV8oMzcmplzmTm3du3avopavepIexeS1IMjCh57F7ApIk4CHgYuAN7W1GY7cBFwF3AesDMzD+lhDMKnPvKuEofVmNq4ceOoS5CmTrHAyMznIuJS4HbgcODjmbknIq4G5jNzO/DHwCcjYoGqZ3FBqXokSf0p2cMgM3cAO5q2XdXw+HvAvyhZgyRpMGZuprckqTcGhiSpKwaGJKkrBoYkqSsGhiSpK1Fo2kMxEbEf+EaPL18DfHOA5QzaONc3zrXBeNc3zrXBeNc3zrXBeNfXXNtLMrOvmc8TFxj9iIj5zBzbad7jXN841wbjXd841wbjXd841wbjXV+J2rwkJUnqioEhSerKrAXG1lEX0ME41zfOtcF41zfOtcF41zfOtcF41zfw2mZqDEOS1LtZ62FIkno00YEREZsjYm9ELETElS32r46Im+v990TEhoZ976u3742IX+j2mKVri4g3RcS9EfHF+s83NLzmc/Uxv1B/vXAE9W2IiO821HBdw2t+qq57ISI+2uvH7fZR29sb6vpCRPwgIk6r9w3z3P1cRHw+Ip6LiPOa9l0UEV+pvy5q2D6sc9eytog4LSLuiog9EXF/RJzfsO/6iPhaw7k7rZfa+qmv3vf/GmrY3rD9pPp98JX6fbFqmLVFxJlN77vvRcRb6n3DPHeXR8SD9b/fHRHxkoZ9g3nfZeZEflEtmf5V4GRgFbAbOKWpzXuA6+rHFwA3149PqduvBk6qj3N4N8ccQm2nAy+uH58KPNzwms8BcyM+dxuAB9oc938Dr6X6JMXbgLOHWVtTm1cC+0Z07jYAPwHcAJzXsP0FwL76z+fXj58/5HPXrraXApvqxy8GHgWOr59f39h2FOeu3vcPbY57C3BB/fg64N3Drq3p3/hbwI+O4Nyd2fB9382B/7MDe99Ncg/jDGAhM/dl5rPATcCWpjZbgE/Uj28F3lgn6Bbgpsx8JjO/BizUx+vmmEVry8z7MnPpkwn3AEdFxOoeaihSX7sDRsSLgGMz866s3ok3AG8ZYW0XAn/Ww/fvu77M/Hpm3g/8oOm1vwD8ZWZ+KzO/DfwlsHmY565dbZn55cz8Sv34EeAxoL+Ptxxgfe3U/+5voHofQPW+GOq5a3IecFtmPt1DDf3Wd2fD972b6lNOYYDvu0kOjBOAhxqeL9bbWrbJzOeAJ4EfW+a13RyzdG2NzgXuy8xnGrb9Sd21/WCvly0GUN9JEXFfRPx1RLyuof1ih2MOo7Yl53NoYAzr3K30tcM8dx1FxBlUv8V+tWHzf6wvdXy4j19g+q3vqIiYj4i7ly75UP27P1G/D3o55qBqW3IBh77vRnHuLqbqMSz32hW/7yY5MFr9h2++5atdm5VuX6l+aqt2RrwC+BDwrxv2vz0zXwm8rv76lz3U1m99jwLrM/N04HLgxog4tstjlq6t2hnxGuDpzHygYf8wz91KXzvMc7f8AarfOj8J/FJmLv0m/T7gZcCrqS5r/NseahtEfeuzmrn8NuD3I+IfDeCYg6pt6dy9kupTRpcM/dxFxDuAOeCaDq9d8d95kgNjETix4fk64JF2bSLiCOA4quuL7V7bzTFL10ZErAM+DbwzM3/4W15mPlz/+ffAjVTd1F70XF99Ge/xuo57qX4LfWndfl3D60dy7mqH/JY35HO30tcO89y1VQf/Z4EPZObdS9sz89GsPAP8CaM5d0uXysjMfVRjUqdTrZV0fP0+WPExB1Vb7ReBT2fm9xtqHuq5i4izgPcD5zRcmRjc+67fwZhRfVF9vOw+qkHrpUGgVzS1+VUOHhy9pX78Cg4e9N5HNajU8ZhDqO34uv25LY65pn58JNU123eN4NytBQ6vH58MPAy8oH6+C/hpDgygvXmYtdXPD6P6j3DyqM5dQ9vrOXTQ+2tUA4/Prx8P9dwtU9sq4A7gshZtX1T/GcDvA787gnP3fGB1/XgN8BXqQV/gzzl40Ps9w6ytYfvdwJmjOndUAfpV6psXSrzvVlz4OH0Bbwa+XJ+k99fbrqZKV4Cj6jfTAtXdAI0/RN5fv24vDXcGtDrmMGsDPgA8BXyh4euFwNHAvcD9VIPhH6H+wT3k+s6tv/9u4PPAP2s45hzwQH3M/0I9MXTI/66vB+5uOt6wz92rqULrKeBxYE/Da3+5rnuB6rLPsM9dy9qAdwDfb3rfnVbv2wl8sa7vT4HnDfvcAT9T17C7/vPihmOeXL8PFur3xeoR/LtuoPrl6bCmYw7z3P0V8HcN/37bB/2+c6a3JKkrkzyGIUkaIgNDktQVA0OS1BUDQ5LUFQNDktQVA0NTJyKOj4j39PH6yyLiR1fQ/i0RccpK20XE1fVEq4G0l0ozMDSNjqda0bZXlwFdBwbVgm0dA6O5XWZelZl/NcD2UlHOw9DUiYillTz3Uq3SeUVEXEG1dMNqquUbfisijqZaGnsd1Uz/3wZ+HPhP9Wu/mZlnNh37d4FzgOeAvwD+G/AZqgUQn6Sa2PgG4BKqGbkLVOtWndai3QeBz2TmrV0et7H9q6kmIB4NPAO8MatlT6RijujcRJo4VwKnZubShyf9PLCJah2fALZHxM9RLXPySGb+07rdcZn5ZERcTrXEwzcbDxoRLwDeCrwsMzMijs/MJ+oP8/lMZt5at3siMz9WP/4PVLOS/6BFu5Ued6n9KuBm4PzM3FWvAfXdAudROoiXpDQLfr7+uo9qOZOXUQXIF4GzIuJDEfG6zHyyw3G+A3wP+K8R8c+Bdp95cGpE/E1EfBF4O9XaZYM47pJ/DDyambsAMvM7eWB5b6kYA0OzIIDfyczT6q+NmfnHmfll4KeoguN3IuKq5Q5S/1A+A/gU1fjC/2jT9Hrg0qyWU//3VGtfDeK4jX8fryVr6AwMTaO/B45peH478MsR8TyAiDghIl4YES+m+tyMP6Uat3hVm9dTv+55wHGZuYNqYPy0Nu2PAR6NiCOpehjt6lrpcZf8H+DF9TgGEXFMw/LeUjG+yTR1MvPxiPjbiHiA6uMyr4iIlwN31eMA/0C1OutG4JqI+AHVSq3vrg+xFbgtIh5tGvQ+BtgWEUdR/Zb/6/X2m4CPRcR7qT6i84PAPcA3qHovx7Rpt9LjLv39no2I84E/iIgfoRq/OKv+e0nFeJeUJKkrXpKSJHXFwJAkdcXAkCR1xcCQJHXFwJAkdcXAkCR1xcCQJHXFwJAkdeX/A/L/r4GXLTkhAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "ht.PlotCdf()\n",
    "thinkplot.Config(xlabel='test statistic',\n",
    "                   ylabel='CDF')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Under the null hypothesis, we often see differences bigger than the observed difference."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "class DiffMeansOneSided(DiffMeansPermute):\n",
    "\n",
    "    def TestStatistic(self, data):\n",
    "        group1, group2 = data\n",
    "        test_stat = group1.mean() - group2.mean()\n",
    "        return test_stat"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "If the hypothesis under test is that first babies come late, the appropriate test statistic is the raw difference between first babies and others, rather than the absolute value of the difference.  In that case, the p-value is smaller, because we are testing a more specific hypothesis."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.085"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ht = DiffMeansOneSided(data)\n",
    "pvalue = ht.PValue()\n",
    "pvalue"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "But in this example, the result is still not statistically significant."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Difference in standard deviation\n",
    "\n",
    "In this framework, it is easy to use other test statistics.  For example, if we think the variance for first babies might be higher, we can run this test:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "class DiffStdPermute(DiffMeansPermute):\n",
    "\n",
    "    def TestStatistic(self, data):\n",
    "        group1, group2 = data\n",
    "        test_stat = group1.std() - group2.std()\n",
    "        return test_stat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.089"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ht = DiffStdPermute(data)\n",
    "pvalue = ht.PValue()\n",
    "pvalue"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "But that's not statistically significant either."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Testing correlation\n",
    "\n",
    "To check whether an observed correlation is statistically significant, we can run a permutation test with a different test statistic."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "class CorrelationPermute(thinkstats2.HypothesisTest):\n",
    "\n",
    "    def TestStatistic(self, data):\n",
    "        xs, ys = data\n",
    "        test_stat = abs(thinkstats2.Corr(xs, ys))\n",
    "        return test_stat\n",
    "\n",
    "    def RunModel(self):\n",
    "        xs, ys = self.data\n",
    "        xs = np.random.permutation(xs)\n",
    "        return xs, ys"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here's an example testing the correlation between birth weight and mother's age."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.0"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cleaned = live.dropna(subset=['agepreg', 'totalwgt_lb'])\n",
    "data = cleaned.agepreg.values, cleaned.totalwgt_lb.values\n",
    "ht = CorrelationPermute(data)\n",
    "pvalue = ht.PValue()\n",
    "pvalue"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "The reported p-value is 0, which means that in 1000 trials we didn't see a correlation, under the null hypothesis, that exceeded the observed correlation.  That means that the p-value is probably smaller than $1/1000$, but it is not actually 0.\n",
    "\n",
    "To get a sense of how unexpected the observed value is under the null hypothesis, we can compare the actual correlation to the largest value we saw in the simulations."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.06883397035410908, 0.03563281248793086)"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ht.actual, ht.MaxTestStat()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Testing proportions\n",
    "\n",
    "Here's an example that tests whether the outcome of a rolling a six-sided die is suspicious, where the test statistic is the total absolute difference between the observed outcomes and the expected long-term averages."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "class DiceTest(thinkstats2.HypothesisTest):\n",
    "\n",
    "    def TestStatistic(self, data):\n",
    "        observed = data\n",
    "        n = sum(observed)\n",
    "        expected = np.ones(6) * n / 6\n",
    "        test_stat = sum(abs(observed - expected))\n",
    "        return test_stat\n",
    "\n",
    "    def RunModel(self):\n",
    "        n = sum(self.data)\n",
    "        values = [1, 2, 3, 4, 5, 6]\n",
    "        rolls = np.random.choice(values, n, replace=True)\n",
    "        hist = thinkstats2.Hist(rolls)\n",
    "        freqs = hist.Freqs(values)\n",
    "        return freqs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here's an example using the data from the book:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.1292"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = [8, 9, 19, 5, 8, 11]\n",
    "dt = DiceTest(data)\n",
    "pvalue = dt.PValue(iters=10000)\n",
    "pvalue"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The observed deviance from the expected values is not statistically significant.\n",
    "\n",
    "By convention, it is more common to test data like this using the chi-squared statistic:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "class DiceChiTest(DiceTest):\n",
    "\n",
    "    def TestStatistic(self, data):\n",
    "        observed = data\n",
    "        n = sum(observed)\n",
    "        expected = np.ones(6) * n / 6\n",
    "        test_stat = sum((observed - expected)**2 / expected)\n",
    "        return test_stat"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Using this test, we get a smaller p-value:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.0415"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dt = DiceChiTest(data)\n",
    "pvalue = dt.PValue(iters=10000)\n",
    "pvalue"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Taking this result at face value, we might consider the data statistically significant, but considering the results of both tests, I would not draw any strong conclusions."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Chi-square test of pregnancy length"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "class PregLengthTest(thinkstats2.HypothesisTest):\n",
    "\n",
    "    def MakeModel(self):\n",
    "        firsts, others = self.data\n",
    "        self.n = len(firsts)\n",
    "        self.pool = np.hstack((firsts, others))\n",
    "\n",
    "        pmf = thinkstats2.Pmf(self.pool)\n",
    "        self.values = range(35, 44)\n",
    "        self.expected_probs = np.array(pmf.Probs(self.values))\n",
    "\n",
    "    def RunModel(self):\n",
    "        np.random.shuffle(self.pool)\n",
    "        data = self.pool[:self.n], self.pool[self.n:]\n",
    "        return data\n",
    "    \n",
    "    def TestStatistic(self, data):\n",
    "        firsts, others = data\n",
    "        stat = self.ChiSquared(firsts) + self.ChiSquared(others)\n",
    "        return stat\n",
    "\n",
    "    def ChiSquared(self, lengths):\n",
    "        hist = thinkstats2.Hist(lengths)\n",
    "        observed = np.array(hist.Freqs(self.values))\n",
    "        expected = self.expected_probs * len(lengths)\n",
    "        stat = sum((observed - expected)**2 / expected)\n",
    "        return stat"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "If we specifically test the deviations of first babies and others from the expected number of births in each week of pregnancy, the results are statistically significant with a very small p-value.  But at this point we have run so many tests, we should not be surprised to find at least one that seems significant."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "p-value = 0.0\n",
      "actual = 101.50141482893264\n",
      "ts max = 26.83921472143575\n"
     ]
    }
   ],
   "source": [
    "data = firsts.prglngth.values, others.prglngth.values\n",
    "ht = PregLengthTest(data)\n",
    "p_value = ht.PValue()\n",
    "print('p-value =', p_value)\n",
    "print('actual =', ht.actual)\n",
    "print('ts max =', ht.MaxTestStat())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Power\n",
    "\n",
    "Here's the function that estimates the probability of a non-significant p-value even is there really is a difference between the groups."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "def FalseNegRate(data, num_runs=1000):\n",
    "    \"\"\"Computes the chance of a false negative based on resampling.\n",
    "\n",
    "    data: pair of sequences\n",
    "    num_runs: how many experiments to simulate\n",
    "\n",
    "    returns: float false negative rate\n",
    "    \"\"\"\n",
    "    group1, group2 = data\n",
    "    count = 0\n",
    "\n",
    "    for i in range(num_runs):\n",
    "        sample1 = thinkstats2.Resample(group1)\n",
    "        sample2 = thinkstats2.Resample(group2)\n",
    "        ht = DiffMeansPermute((sample1, sample2))\n",
    "        p_value = ht.PValue(iters=101)\n",
    "        if p_value > 0.05:\n",
    "            count += 1\n",
    "\n",
    "    return count / num_runs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.691"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "neg_rate = FalseNegRate(data)\n",
    "neg_rate"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In this example, the false negative rate is 70%, which means that the power of the test (probability of statistical significance if the actual difference is 0.078 weeks) is only 30%."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Exercises"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Exercise:** As sample size increases, the power of a hypothesis test increases, which means it is more likely to be positive if the effect is real. Conversely, as sample size decreases, the test is less likely to be positive even if the effect is real.\n",
    "\n",
    "To investigate this behavior, run the tests in this chapter with different subsets of the NSFG data. You can use `thinkstats2.SampleRows` to select a random subset of the rows in a DataFrame.\n",
    "\n",
    "What happens to the p-values of these tests as sample size decreases? What is the smallest sample size that yields a positive test?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Solution\n",
    "\n",
    "def RunTests(live, iters=1000):\n",
    "    \"\"\"Runs the tests from Chapter 9 with a subset of the data.\n",
    "\n",
    "    live: DataFrame\n",
    "    iters: how many iterations to run\n",
    "    \"\"\"\n",
    "    n = len(live)\n",
    "    firsts = live[live.birthord == 1]\n",
    "    others = live[live.birthord != 1]\n",
    "\n",
    "    # compare pregnancy lengths\n",
    "    data = firsts.prglngth.values, others.prglngth.values\n",
    "    ht = DiffMeansPermute(data)\n",
    "    p1 = ht.PValue(iters=iters)\n",
    "\n",
    "    data = (firsts.totalwgt_lb.dropna().values,\n",
    "            others.totalwgt_lb.dropna().values)\n",
    "    ht = DiffMeansPermute(data)\n",
    "    p2 = ht.PValue(iters=iters)\n",
    "\n",
    "    # test correlation\n",
    "    live2 = live.dropna(subset=['agepreg', 'totalwgt_lb'])\n",
    "    data = live2.agepreg.values, live2.totalwgt_lb.values\n",
    "    ht = CorrelationPermute(data)\n",
    "    p3 = ht.PValue(iters=iters)\n",
    "\n",
    "    # compare pregnancy lengths (chi-squared)\n",
    "    data = firsts.prglngth.values, others.prglngth.values\n",
    "    ht = PregLengthTest(data)\n",
    "    p4 = ht.PValue(iters=iters)\n",
    "\n",
    "    print('%d\\t%0.2f\\t%0.2f\\t%0.2f\\t%0.2f' % (n, p1, p2, p3, p4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "9148\t0.16\t0.00\t0.00\t0.00\n",
      "4574\t0.24\t0.00\t0.00\t0.00\n",
      "2287\t0.80\t0.15\t0.00\t0.00\n",
      "1143\t0.64\t0.03\t0.00\t0.00\n",
      "571\t0.36\t0.78\t0.88\t0.03\n",
      "285\t0.71\t0.03\t0.53\t0.05\n",
      "142\t0.12\t0.62\t0.54\t0.07\n"
     ]
    }
   ],
   "source": [
    "# Solution\n",
    "\n",
    "n = len(live)\n",
    "for _ in range(7):\n",
    "    sample = thinkstats2.SampleRows(live, n)\n",
    "    RunTests(sample)\n",
    "    n //= 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Solution\n",
    "\n",
    "# My results:\n",
    "\n",
    "# test1: difference in mean pregnancy length\n",
    "# test2: difference in mean birth weight\n",
    "# test3: correlation of mother's age and birth weight\n",
    "# test4: chi-square test of pregnancy length\n",
    "\n",
    "# n       test1   test2   test2   test4\n",
    "# 9148\t0.16\t0.00\t0.00\t0.00\n",
    "# 4574\t0.10\t0.01\t0.00\t0.00\n",
    "# 2287\t0.25\t0.06\t0.00\t0.00\n",
    "# 1143\t0.24\t0.03\t0.39\t0.03\n",
    "# 571\t0.81\t0.00\t0.04\t0.04\n",
    "# 285\t0.57\t0.41\t0.48\t0.83\n",
    "# 142\t0.45\t0.08\t0.60\t0.04\n",
    "\n",
    "# Conclusion: As expected, tests that are positive with large sample\n",
    "# sizes become negative as we take away data.  But the pattern is\n",
    "# erratic, with some positive tests even at small sample sizes.\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Exercise:** In Section 9.3, we simulated the null hypothesis by permutation; that is, we treated the observed values as if they represented the entire population, and randomly assigned the members of the population to the two groups.\n",
    "\n",
    "An alternative is to use the sample to estimate the distribution for the population, then draw a random sample from that distribution. This process is called resampling. There are several ways to implement resampling, but one of the simplest is to draw a sample with replacement from the observed values, as in Section 9.10.\n",
    "\n",
    "Write a class named `DiffMeansResample` that inherits from `DiffMeansPermute` and overrides `RunModel` to implement resampling, rather than permutation.\n",
    "\n",
    "Use this model to test the differences in pregnancy length and birth weight. How much does the model affect the results?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Solution\n",
    "\n",
    "class DiffMeansResample(DiffMeansPermute):\n",
    "    \"\"\"Tests a difference in means using resampling.\"\"\"\n",
    "    \n",
    "    def RunModel(self):\n",
    "        \"\"\"Run the model of the null hypothesis.\n",
    "\n",
    "        returns: simulated data\n",
    "        \"\"\"\n",
    "        group1 = np.random.choice(self.pool, self.n, replace=True)\n",
    "        group2 = np.random.choice(self.pool, self.m, replace=True)\n",
    "        return group1, group2\n",
    "  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Solution\n",
    "\n",
    "def RunResampleTest(firsts, others):\n",
    "    \"\"\"Tests differences in means by resampling.\n",
    "\n",
    "    firsts: DataFrame\n",
    "    others: DataFrame\n",
    "    \"\"\"\n",
    "    data = firsts.prglngth.values, others.prglngth.values\n",
    "    ht = DiffMeansResample(data)\n",
    "    p_value = ht.PValue(iters=10000)\n",
    "    print('\\ndiff means resample preglength')\n",
    "    print('p-value =', p_value)\n",
    "    print('actual =', ht.actual)\n",
    "    print('ts max =', ht.MaxTestStat())\n",
    "\n",
    "    data = (firsts.totalwgt_lb.dropna().values,\n",
    "            others.totalwgt_lb.dropna().values)\n",
    "    ht = DiffMeansPermute(data)\n",
    "    p_value = ht.PValue(iters=10000)\n",
    "    print('\\ndiff means resample birthweight')\n",
    "    print('p-value =', p_value)\n",
    "    print('actual =', ht.actual)\n",
    "    print('ts max =', ht.MaxTestStat())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "diff means resample preglength\n",
      "p-value = 0.1674\n",
      "actual = 0.07803726677754952\n",
      "ts max = 0.21393028325881147\n",
      "\n",
      "diff means resample birthweight\n",
      "p-value = 0.0003\n",
      "actual = 0.12476118453549034\n",
      "ts max = 0.1335955672457132\n"
     ]
    }
   ],
   "source": [
    "# Solution\n",
    "\n",
    "RunResampleTest(firsts, others)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Solution\n",
    "\n",
    "# Conclusions: Using resampling instead of permutation has very\n",
    "# little effect on the results.\n",
    "\n",
    "# The two models are based on slightly difference assumptions, and in\n",
    "# this example there is no compelling reason to choose one or the other.\n",
    "# But in general p-values depend on the choice of the null hypothesis;\n",
    "# different models can yield very different results."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
